qui {

 noisily {
/**********************************************************************************************/
/**********************************************************************************************/
/*********************** Step 2. Semi-parametric route: First-step ****************************/
/**********************************************************************************************/
/**********************************************************************************************/

/*
To estimate the net per capita GDP gain of secession for each NIC in the sample, this route simulates the counterfactual per capita GDP trajectory of each NIC absent state break-up and compares it to the actually observed 
trajectories with secession.
*/

	/*************************************************/
	/************* Step 2.1. Data construction *******/
	/*************************************************/
}

	* General set-up 
	
		* Identify independence year(s) of each country
		gen independence_year_first = .
		gen independence_year_second = .
		
		sum cntrycode
		foreach cntry of numlist 1/`r(max)' {
			sum year if independence_years == 0 & cntrycode == `cntry'
			cap replace independence_year_first = `r(min)' if cntrycode == `cntry'
			cap replace independence_year_second = `r(max)' if cntrycode == `cntry'
			}
		
		* Add rows to collect placebo-results for each NIC
		scalar originalrows = _N
		local originalrows = _N
		scalar aditionalrows = 216*(2015-${startyear})
		local aditionalrows = 216*(2015-${startyear})

		local maxrows = _N + 216*(2016-${startyear})
		scalar maxrows = _N + 216*(2016-${startyear})
		set obs `maxrows'

		local i = originalrows+1
		local j = ${startyear}
		local k = 1
		local l = 1000

		while `i' < maxrows {
			replace year = `j' in `i'
			replace cntry = "Pseudogap_`k'" in `i'
			replace cntrycode = `l' in `i'
			if `j' < 2017 {
				local j = `j' + 1
				}
			if `j' > 2016 {
				local j = ${startyear} 
				local k = `k' + 1
				local l = `l' + 1
				}
			local i = `i' + 1
			}
					
		* Save dataset
		cd ..
		cd ".\3. Intermediary results"
		save starting_data, replace

		* Construct NIC-specific subsamples to implement the SCM
		// Note: the synth command requires balanced panel. 
		//       As pre-independence data availability differs across NICs, we construct a balanced sample to implement the SCM for each separate NIC and each separate specification.
			
			* Identify all NICs
			preserve
			drop if cntrycode == 95 /*German Democratic Republic*/ | cntrycode == 96 /*German Federal Republic*/ | cntrycode ==  97 /*Germany*/  | cntrycode ==  308 /*Yemen*/		// Drop unifications (=/= secessions) and E/W-German case (countries no longer exist today)
			levelsof cntrycode if year >= $startyear & independence_years == 0, local(NICs)
			global NICs `NICs'
			restore
			sum cntrycode if year >= $startyear & independence_years == 0
			local max = `r(N)'
						
			noi di "Preparing SCM subsample for NIC (/`max'):"
			local count = 0
			
			* Construct necessary subsamples
			foreach NIC in $NICs {
					
				* Check progress
				local count = `count'+1
				noi di `count', _continue
				
				* Check whether the subsample needs to be constructed
				cd ".\3.4. Intermediaries"
				capture confirm file  data_15_`NIC'.dta
				cd ..
				if _rc != 0 {
										
					* 1. MSPE = 10
				
						* Load master data
						use starting_data, clear
												
						* Get relevant data on year of independence
						sum year if cntrycode == `NIC' & year >= $startyear & independence_years == 0
						local independence_year = r(mean)
						local preindependence_year_1 = r(mean) - 1
						local preindependence_year_5 = r(mean) - 5
						local preindependence_year_10 = r(mean) - 10
						local preindependence_year_15 = r(mean) - 15
						
						* Check earliest data availability for the growth determinants in the country under research, up until 10 years back
						foreach var in $SCM_covariates {
							sum year if `var' != . & cntrycode == `NIC'
							if `preindependence_year_10' >= r(min) {
								scalar earliest_year_`var' = `preindependence_year_10'
								local earliest_year_`var' = `preindependence_year_10'
								}
							else if `preindependence_year_10' < r(min) {
								scalar earliest_year_`var' = r(min)
								local earliest_year_`var' = r(min)
								}
							if r(N) == 0 {
								scalar earliest_year_`var' = 9999
								local earliest_year_`var' = 9999
								}
							}
							
						* Check latest gdpcap data, 30 years onwards suffices
						sum year if cntrycode == `NIC' & independence_years > 0 & independence_years <= 30 & baseline_gdpcap != . & year > $startyear
						if `r(N)' > 0 {
							scalar last_year = `r(max)'
							local last_year = `r(max)'
							}
						else if `r(N)' == 0 {
							scalar last_year = -999
							local last_year = -999
							}
						
						* Determine covariates with pre-independence data available for the NIC under consideration
						global SCM_covariates_1			
						scalar earliest_year = -999
						
						foreach var in $SCM_covariates {
							if earliest_year_`var' <= `preindependence_year_1' {
								global SCM_covariates_1 $SCM_covariates_1 `var'
								scalar earliest_year = max(earliest_year,earliest_year_`var')
								}
							}
							
						* if there is over 10 years of data for each variable under consideration: use maximal number of years allowed by data constraints imposed by selected NIC
						if earliest_year <= `preindependence_year_10' & last_year != -999 {	
							
							* Drop placebo countries and countries violating no-contamination constraint
							drop if cntrycode > 500 // Drop data on placebo gaps: irrelevant
							drop if independence_year_first <= (`independence_year' + 10) & independence_year_first > (`independence_year' - 10) & cntrycode != `NIC' 		// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)
							drop if independence_year_second <= (`independence_year' + 10) & independence_year_second > (`independence_year' - 10) & cntrycode != `NIC' 	// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)
						
							* Drop countries from the sample with missing data where there is data for the country under research
							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								foreach var in $SCM_covariates_1 {
									sum `var' if year == earliest_year_`var' & cntrycode == `cntry'
									drop if cntrycode == `cntry' & r(mean) == .
									sum `var' if year == `preindependence_year_1' & cntrycode == `cntry'
									drop if cntrycode == `cntry' & r(mean) == .
									}
								}

							* Drop countries from the sample with missing data in the post-treatment period
							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								sum baseline_gdpcap if year == last_year & cntrycode == `cntry' & independence_years > 0
								drop if cntrycode == `cntry' & r(mean) == .
								}									
								
							scalar earliest_year = -999 // Will get filled here
							
							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								sum year if baseline_gdpcap != . & cntrycode == `cntry'
								if r(min) > earliest_year & r(min) != . {
									scalar earliest_year = r(min)
									local earliest_year = r(min)
									}
								}
							}
						
						* if there is less than 10 years of data for each variable under consideration: use maximal number of years allowed by data constraints imposed by selected NIC
						if earliest_year > `preindependence_year_10' & earliest_year < `independence_year' & last_year != -999 {	
							
							* Drop placebo countries and countries violating no-contamination constraint
							drop if cntrycode > 500 // Drop data on placebo gaps: irrelevant
							drop if independence_year_first <= (`independence_year' + 10) & independence_year_first > (`independence_year' - 10) & cntrycode != `NIC' 		// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)
							drop if independence_year_second <= (`independence_year' + 10) & independence_year_second > (`independence_year' - 10) & cntrycode != `NIC' 	// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)

							* Drop countries from the sample with missing data where there is data for the country under research
							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								foreach var in $SCM_covariates_1 {
									sum `var' if year == earliest_year_`var' & cntrycode == `cntry'
									drop if cntrycode == `cntry' & r(mean) == .
									sum `var' if year == `preindependence_year_1' & cntrycode == `cntry'
									drop if cntrycode == `cntry' & r(mean) == .
									}
								}
								
							* Drop countries from the sample with missing data in the post-treatment period
							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								sum baseline_gdpcap if year == last_year & cntrycode == `cntry' & independence_years > 0
								drop if cntrycode == `cntry' & r(mean) == .
								}									

							scalar earliest_year = -999 // Will get filled here

							levelsof cntrycode if cntrycode < 500
							foreach cntry in `r(levels)' {
								sum year if baseline_gdpcap == . & cntrycode == `cntry'
								local maxyear = r(max)
								if `maxyear' != . {
									sum year if baseline_gdpcap != . & cntrycode == `cntry'
									if r(min) > earliest_year & r(min) != . {
										scalar earliest_year = r(min)
										local earliest_year = r(min)
										}
									}
								else if `maxyear' == . {
									sum year if baseline_gdpcap != . & cntrycode == `cntry'
									if r(min) > earliest_year & r(min) != . {
										scalar earliest_year = r(min)
										local earliest_year = r(min)
										}
									}
								}
							}
							
						* Record information for use in SCM-algorithm
						preserve
						if last_year != -999 {
							gen independence_year = `independence_year'
							gen preindependence_year_1 = `preindependence_year_1'
							foreach var in $SCM_covariates_1 {
								gen earliest_year_`var' = `earliest_year_`var''
								}
							gen earliest_year = `earliest_year'
							gen last_year = `last_year'
							gen mspe_begin = `earliest_year_baseline_gdpcap'
							gen mspe_end = `preindependence_year_1'
							gen results_begin = `earliest_year_baseline_gdpcap'
							gen results_end = `last_year'
					
							* Save subsample
							keep cntry cntrycode year independence_years $SCM_covariates_1 independence_year preindependence_* earliest_* last_*
							
							cd ".\3.4. Intermediaries"
							save data_10_`NIC', replace
							cd ..
							}
						restore
														
						* 3. MSPE = 15, cross validation
					
							* Load master data
							use starting_data, clear
													
							* Get relevant data on year of independence
							sum year if cntrycode == `NIC' & year >= $startyear & independence_years == 0
							local independence_year = r(mean)
							local preindependence_year_1 = r(mean) - 1
							local preindependence_year_5 = r(mean) - 5
							local preindependence_year_10 = r(mean) - 10
							local preindependence_year_15 = r(mean) - 15
							
							* Check earliest data availability for the growth determinants in the country under research, up until 15 years back
							foreach var in $SCM_covariates {
								sum year if `var' != . & cntrycode == `NIC'
								if `preindependence_year_15' >= r(min) {
									scalar earliest_year_`var' = `preindependence_year_15'
									local earliest_year_`var' = `preindependence_year_15'
									}
								else if `preindependence_year_15' < r(min) {
									scalar earliest_year_`var' = r(min)
									local earliest_year_`var' = r(min)
									}
								if r(N) == 0 {
									scalar earliest_year_`var' = 9999
									local earliest_year_`var' = 9999
									}
								}
								
							* Check latest gdpcap data, 30 years onwards suffices
							sum year if cntrycode == `NIC' & independence_years > 0 & independence_years <= 30 & baseline_gdpcap != . & year > $startyear
							if `r(N)' > 0 {
								scalar last_year = `r(max)'
								local last_year = `r(max)'
								}
							else if `r(N)' == 0 {
								scalar last_year = -999
								local last_year = -999
								}
							
							* Determine covariates with per-independence data available for the NIC under consideration
							global SCM_covariates_1			
							scalar earliest_year = -999
							
							foreach var in $SCM_covariates {
								if earliest_year_`var' <= `preindependence_year_1' {
									global SCM_covariates_1 $SCM_covariates_1 `var'
									scalar earliest_year = max(earliest_year,earliest_year_`var')
									}
								}
								
							* if there is over 15 years of data for each variable under consideration: use maximal number of years allowed by data constraints imposed by selected NIC
							if earliest_year <= `preindependence_year_15' & last_year != -999 {	
								
								* Drop placebo countries and countries violating no-contamination constraint
								drop if cntrycode > 500 // Drop data on placebo gaps: irrelevant
								drop if independence_year_first <= (`independence_year' + 10) & independence_year_first > (`independence_year' - 10) & cntrycode != `NIC' 		// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)
								drop if independence_year_second <= (`independence_year' + 10) & independence_year_second > (`independence_year' - 10) & cntrycode != `NIC' 	// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)

								* Drop countries from the sample with missing data where there is data for the country under research
								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									foreach var in $SCM_covariates_1 {
										sum `var' if year == earliest_year_`var' & cntrycode == `cntry'
										drop if cntrycode == `cntry' & r(mean) == .
										sum `var' if year == `preindependence_year_1' & cntrycode == `cntry'
										drop if cntrycode == `cntry' & r(mean) == .
										}
									}

								* Drop countries from the sample with missing data in the post-treatment period
								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									sum baseline_gdpcap if year == last_year & cntrycode == `cntry' & independence_years > 0
									drop if cntrycode == `cntry' & r(mean) == .
									}									
									
								scalar earliest_year = -999 // Will get filled here
									
								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									sum year if baseline_gdpcap != . & cntrycode == `cntry'
									if r(min) > earliest_year & r(min) != . {
										scalar earliest_year = r(min)
										local earliest_year = r(min)
										}
									}
								}
							
							* if there is less than 15 years of data for each variable under consideration: use maximal number of years allowed by data constraints imposed by selected NIC
							if earliest_year > `preindependence_year_15' & earliest_year < `independence_year' & last_year != -999 {	
								
								* Drop placebo countries and countries violating no-contamination constraint
								drop if cntrycode > 500 // Drop data on ^placebo gaps: irrelevant
								drop if independence_year_first <= (`independence_year' + 10) & independence_year_first > (`independence_year' - 10) & cntrycode != `NIC' 		// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)
								drop if independence_year_second <= (`independence_year' + 10) & independence_year_second > (`independence_year' - 10) & cntrycode != `NIC' 	// Remove countries that *themselves* became independent 10 years before or after the NIC under research (avoid contamination of control group)

								* Drop countries from the sample with missing data where there is data for the country under research
								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									foreach var in $SCM_covariates_1 {
										sum `var' if year == earliest_year_`var' & cntrycode == `cntry'
										drop if cntrycode == `cntry' & r(mean) == .
										sum `var' if year == `preindependence_year_1' & cntrycode == `cntry'
										drop if cntrycode == `cntry' & r(mean) == .
										}
									}
									
								* Drop countries from the sample with missing data in the post-treatment period
								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									sum baseline_gdpcap if year == last_year & cntrycode == `cntry' & independence_years > 0
									drop if cntrycode == `cntry' & r(mean) == .
									}									

								scalar earliest_year = -999 // Will get filled here

								levelsof cntrycode if cntrycode < 500
								foreach cntry in `r(levels)' {
									sum year if baseline_gdpcap == . & cntrycode == `cntry'
									local maxyear = r(max)
									if `maxyear' != . {
										sum year if baseline_gdpcap != . & cntrycode == `cntry'
										if r(min) > earliest_year & r(min) != . {
											scalar earliest_year = r(min)
											local earliest_year = r(min)
											}
										}
									else if `maxyear' == . {
										sum year if baseline_gdpcap != . & cntrycode == `cntry'
										if r(min) > earliest_year & r(min) != . {
											scalar earliest_year = r(min)
											local earliest_year = r(min)
											}
										}
									}
								}
								
							* Record information for use in SCM-algorithm
							if last_year != -999 {
								gen independence_year = `independence_year'
								gen preindependence_year_1 = `preindependence_year_5'
								foreach var in $SCM_covariates_1 {
									gen earliest_year_`var' = `earliest_year_`var''
									}
								gen earliest_year = `earliest_year'
								gen last_year = `last_year'
								gen mspe_begin = `earliest_year_baseline_gdpcap'
								gen mspe_end = `preindependence_year_10'
								gen results_begin = `earliest_year_baseline_gdpcap'
								gen results_end = `last_year'
						
								* Save subsample
								keep cntry cntrycode year independence_years $SCM_covariates_1 independence_year preindependence_* earliest_* last_*
								cd ".\3.4. Intermediaries"
								save data_15_`NIC', replace		
								cd ..
								}
							}
						}
								
		* Reroute to directory containing dofiles
		cd ..
		cd ".\1. Dofiles"
		}
